Ok we’ve looked at a lot, maybe streamline to investigate grid size and whether a different grid size can make things more robust
we did 8x8 so we can go smaller or larger
5x5, 6x6, 10x10
could do ensemble members instead of ensemble averages
maybe we don’t do the som at all, maybe we just plot a 4 panel map for each ESM with the color coming from T, P, iasd. The color is sort of interpretable, especially with all of the same ESMs on the same color wheel
And some package and labeling info
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(kohonen)
library(vegan) # for vegdist function which gives a dissimilarity index
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
timeseries_dir <- 'extracted_timeseries/'
# get ecs ordering/labels
esm_labels <- read.csv(paste0(timeseries_dir,'global_tas_allesms.csv'), stringsAsFactors = FALSE) %>%
select(esm) %>% distinct %>%
mutate(plotesm = paste0(letters[as.integer(row.names(.))], '.', esm),
ECS_order = as.integer(row.names(.)))
print(esm_labels)
## esm plotesm ECS_order
## 1 CAMS-CSM1-0 a.CAMS-CSM1-0 1
## 2 MIROC6 b.MIROC6 2
## 3 GFDL-ESM4 c.GFDL-ESM4 3
## 4 FGOALS-g3 d.FGOALS-g3 4
## 5 MPI-ESM1-2-HR e.MPI-ESM1-2-HR 5
## 6 MPI-ESM1-2-LR f.MPI-ESM1-2-LR 6
## 7 MRI-ESM2-0 g.MRI-ESM2-0 7
## 8 ACCESS-ESM1-5 h.ACCESS-ESM1-5 8
## 9 IPSL-CM6A-LR i.IPSL-CM6A-LR 9
## 10 CESM2-WACCM j.CESM2-WACCM 10
## 11 UKESM1-0-LL k.UKESM1-0-LL 11
## 12 CanESM5 l.CanESM5 12
process_time_series <- function(time_series_df, esm_label_info,
hist_start = 1995, hist_end = 2014){
# Get ensemble member values for projection runs:
# 1. 2080-2099 average
# 2. loess detrend each ensemble member to get IAV
#
# split by run
non_hist <- time_series_df %>%
filter(experiment != 'historical') %>%
select(year, ann_agg, esm, experiment, ensemble, variable, region)
grouped <- split(non_hist, f = list(non_hist$esm,
non_hist$experiment,
non_hist$ensemble,
non_hist$variable ,
non_hist$region) )
# split creates group of every possible combo of the variables and fills in
# empty dataframes for the ones that don't exist in data. Drop those
grouped <- grouped[lapply(grouped, nrow)>0]
processed_groups <- lapply(grouped, FUN = function(run_df){
loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
run_df %>%
filter(year >= 2080, year <= 2099) %>%
group_by(esm, experiment, ensemble, variable, region) %>%
summarise(average_2080_2099 = mean(ann_agg)) %>%
ungroup %>%
mutate(iasd = sd((loess_resids))) ->
output_df
return(output_df)
})
individual_stats <- do.call(bind_rows, processed_groups)
rm(non_hist)
rm(grouped)
rm(processed_groups)
# calculate ensemble averages
individual_stats %>%
group_by(esm, experiment, variable,region) %>%
summarise(average_2080_2099 = mean(average_2080_2099),
iasd = mean(iasd)) %>%
ungroup ->
ensemble_stats
# get ensemble average historical average value:
time_series_df %>%
filter(experiment == 'historical',
year >= hist_start,
year <= hist_end) %>%
group_by(esm, experiment, ensemble, variable, region) %>%
summarise(historical_average = mean(ann_agg)) %>%
ungroup %>%
group_by(esm, experiment, variable, region) %>%
summarise(historical_average = mean(historical_average)) %>%
ungroup %>%
select(-experiment) ->
historical_ens
# shape and calculate changes for plotting:
ensemble_stats %>%
left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
left_join(esm_label_info, by = 'esm') %>%
mutate(change = average_2080_2099 - historical_average,
pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
plot_tbl
return(plot_tbl)
}
process_ens_time_series <- function(time_series_df, esm_label_info,
hist_start = 1995, hist_end = 2014){
# Get ensemble member values for projection runs:
# 1. 2080-2099 average
# 2. loess detrend each ensemble member to get IAV
#
# split by run
non_hist <- time_series_df %>%
filter(experiment != 'historical') %>%
select(year, ann_agg, esm, experiment, ensemble, variable, region)
grouped <- split(non_hist, f = list(non_hist$esm,
non_hist$experiment,
non_hist$ensemble,
non_hist$variable ,
non_hist$region) )
# split creates group of every possible combo of the variables and fills in
# empty dataframes for the ones that don't exist in data. Drop those
grouped <- grouped[lapply(grouped, nrow)>0]
processed_groups <- lapply(grouped, FUN = function(run_df){
loess_resids <- loess(run_df$ann_agg ~ run_df$year)$residuals
run_df %>%
filter(year >= 2080, year <= 2099) %>%
group_by(esm, experiment, ensemble, variable, region) %>%
summarise(average_2080_2099 = mean(ann_agg)) %>%
ungroup %>%
mutate(iasd = sd((loess_resids))) ->
output_df
return(output_df)
})
individual_stats <- do.call(bind_rows, processed_groups)
rm(non_hist)
rm(grouped)
rm(processed_groups)
# calculate ensemble averages
individual_stats ->
ensemble_stats
# get ensemble average historical average value:
time_series_df %>%
filter(experiment == 'historical',
year >= hist_start,
year <= hist_end) %>%
group_by(esm, experiment, ensemble, variable, region) %>%
summarise(historical_average = mean(ann_agg)) %>%
ungroup %>%
group_by(esm, experiment, variable, region) %>%
summarise(historical_average = mean(historical_average)) %>%
ungroup %>%
select(-experiment) ->
historical_ens
# shape and calculate changes for plotting:
ensemble_stats %>%
left_join(historical_ens, by = c('esm', 'variable', 'region')) %>%
left_join(esm_label_info, by = 'esm') %>%
mutate(change = average_2080_2099 - historical_average,
pct_change = 100*(average_2080_2099 - historical_average)/historical_average) ->
plot_tbl
return(plot_tbl)
}
prep_esm_TP_data <- function(esmname){
region_timeseries <- read.csv(paste0(timeseries_dir, 'IPCC_land_regions_tas_', esmname, '_timeseries_1980_2099.csv'),
stringsAsFactors = FALSE) %>% mutate(region = acronym)
region_tas_summary <- suppressMessages(process_time_series(time_series_df = region_timeseries, esm_label_info = esm_labels))
region_pr_summary <- suppressMessages(process_time_series(time_series_df =
read.csv(paste0(timeseries_dir, 'IPCC_land_regions_pr_', esmname,'_timeseries_1980_2099.csv'),
stringsAsFactors = FALSE) %>%
rename(ann_agg=pr) %>%
mutate(region = acronym) ,
esm_label_info = esm_labels))
# reshape so each row is an observation
# observation = esm - experiment - region - tas change-tas iasd - pr pct change
region_tas_summary %>%
select(esm, experiment, region, iasd, change) %>%
rename(tas_change = change) %>%
left_join(region_pr_summary %>%
select(esm, experiment, region, pct_change) %>%
rename(pr_pct = pct_change),
by = c('esm', 'experiment', 'region')) ->
region_summary
return(region_summary)
}
Consider an observation = esm - experiment - region : tas change-tas iasd - pr pct change
# region_summary <- prep_esm_TP_data(esmname = 'GFDL-ESM4')
# region_summary %>%
# bind_rows(prep_esm_TP_data(esmname = 'CESM2-WACCM')) %>%
# bind_rows(prep_esm_TP_data(esmname = 'CAMS-CSM1-0'))->
# region_summary_main
# write.csv(region_summary_main, 'CAMS_GFDL_CESM2-WACCM_summaries.csv', row.names = F)
region_summary_main <- read.csv('CAMS_GFDL_CESM2-WACCM_summaries.csv', stringsAsFactors = FALSE)
# make a copy to operate on
region_summary <- region_summary_main
print(head(region_summary))
## esm experiment region iasd tas_change pr_pct
## 1 GFDL-ESM4 ssp119 ARP 0.2652738 0.2610944 2.9927902
## 2 GFDL-ESM4 ssp119 CAF 0.2703431 0.5037959 -1.9838156
## 3 GFDL-ESM4 ssp119 CAR 0.1574689 0.2216722 -0.3187443
## 4 GFDL-ESM4 ssp119 CAU 0.5345113 0.4218907 -3.8398722
## 5 GFDL-ESM4 ssp119 CNA 0.4910504 0.9446315 6.3459839
## 6 GFDL-ESM4 ssp119 EAS 0.2584605 0.7250376 10.5204045
default SOM packages can only operate on numerical data, not categorical. So we have to assign some amount of spatial location numerical info to each acronym. Ideally mean lat-lon in the shape?
Also we want the shapes for plotting anyway, so prep them
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
shp <- st_read(dsn = 'IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp', stringsAsFactors = F)
## Reading layer `IPCC-WGI-reference-regions-v4' from data source
## `/Users/snyd535/Documents/GitHub/stitches_in_r/R/inst/shinyApp/python_curation/IPCC-WGI-reference-regions-v4_shapefile/IPCC-WGI-reference-regions-v4.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 90
## Geodetic CRS: WGS 84
# add a numerical region id
shp %>%
mutate(region_id = as.integer(row.names(.))) ->
shp
# add coordinate info probably
shp1 <- st_transform(shp, "+proj=longlat +ellps=WGS84 +datum=WGS84")
# extract
coords <- as.data.frame(st_coordinates(shp1))
# get a mean lon and lat value in each shape
coords %>%
rename(lon = X, lat = Y, region_id = L3) %>%
left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
filter(grepl('PO', Acronym)) %>%
# have to have lon on 0:360 so th pacific ocean behaves even though not
# looking at that here
mutate(lon_360 = if_else(lon >=0, lon, lon+360))%>%
group_by(region_id) %>%
summarise(mean_lon = mean(lon_360),
mean_lat = mean(lat)) %>%
ungroup %>%
mutate(mean_lon = if_else(mean_lon >= 0 & mean_lon <= 180,
mean_lon, mean_lon - 360) ) ->
mean_pts_PO
coords %>%
rename(lon = X, lat = Y, region_id = L3) %>%
left_join(as.data.frame(shp) %>% select(region_id, Acronym), by = 'region_id') %>%
filter(!grepl('PO', Acronym)) %>%
# have to have lon on 0:360 so th pacific ocean behaves even though not
# looking at that here
group_by(region_id) %>%
summarise(mean_lon = mean(lon),
mean_lat = mean(lat)) %>%
ungroup %>%
bind_rows(mean_pts_PO)->
mean_pts
# Join to the shape file and make sure this very simple way of
# doing things ends up with a lon lat that is actually in each region
shp %>%
left_join(mean_pts, by = 'region_id') ->
shp
ggplot() +
geom_sf(data = shp ) +
geom_point(data = shp, mapping = aes(x = mean_lon, y = mean_lat), color = 'red') +
geom_text(data = shp, mapping = aes(label = Acronym, x = mean_lon, y= mean_lat), size =5)
we have 3 variables per observarion = experimentXregion -> convert to rgb values
looking across ESMs, we’ll want to make sure we have them all on consistent range before converting to (0,255)
each color/family of colors does have a physical interpretation in terms of iasd, t, p
red = temperature
blue = precip
iasd = green
# convert to RGB
region_summary %>%
filter(experiment != 'ssp119') ->
region_summary
region_summary$r <- scales::rescale(region_summary$tas_change, to =c(0,255))
region_summary$g <- scales::rescale(region_summary$iasd, to =c(0,255))
region_summary$b <- scales::rescale(region_summary$pr_pct, to =c(0,255))
# add spatial numerical info
region_summary %>%
left_join(as.data.frame(shp) %>% select(Acronym, mean_lon, mean_lat), by = c('region' = 'Acronym')) %>%
rename(lon = mean_lon, lat = mean_lat) %>%
# add the original colors
mutate(orig_color = rgb(.$r, .$g, .$b, maxColorValue = 255),
color_order = as.integer(row.names(.))) ->
region_summary
region_summary %>%
select(color_order, orig_color) %>%
distinct() ->
colors
region_numerical <- as.data.frame(region_summary[c('lon', 'lat', 'r', "g", "b")])
Green dominant = iasd dominates T and P
red dominant = T dominates iasd and P
blue dominant = P dominates T and IASD
black: magnitude of (r,g,b) overall small/near min value of T,P, IASD across SSPs and ESMs as set up
white: magnitude of (r,g,b) overall large/near max value of T,P, IASD across SSPs and ESMs as set up
MAJOR downside - don’t have a clear increasing vs decreasing interpretation with how it’s set up.
shp %>%
left_join(region_summary, by = c('Acronym' = 'region')) %>%
left_join(esm_labels %>% select(esm, plotesm), by = 'esm') ->
shp_esms
p<- ggplot() +
geom_sf(data = shp_esms %>% na.omit, aes(fill = as.factor(color_order)) ) +
scale_fill_manual(values =colors$orig_color)+
facet_grid(plotesm~experiment ) +
theme(legend.position = 'none')
ggsave('python_curation_figs/CAMS_GFDL_CESM_rawdata.png', p, width =8, units = 'in')
## Saving 8 x 5 in image
p
# only decreasing precip
shp_esms %>%
mutate(precip_change = if_else(pr_pct >=0, 'inc', 'dec'))->
shp_esms2
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
scale_fill_manual(values =colors$orig_color)+
scale_color_manual(values = c('red', 'blue'))+
facet_grid(plotesm~experiment ) +
theme(legend.position = 'none')
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
scale_fill_manual(values =colors$orig_color)+
scale_color_manual(values = c('red', 'blue'))+
facet_grid(plotesm~experiment )
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
scale_fill_manual(values =colors$orig_color)+
scale_color_manual(values = c('yellow', 'blue'))+
facet_grid(plotesm~experiment ) +
theme(legend.position = 'none')
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes(color = precip_change )) +
scale_fill_manual(values =colors$orig_color)+
scale_color_manual(values = c('yellow', 'blue'))+
facet_grid(plotesm~experiment )
# same thing but black is low magnitude therefore decreasing P, white is high and therefore inc P
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes(fill = as.factor(color_order), color = precip_change )) +
scale_fill_manual(values =colors$orig_color)+
scale_color_manual(values = c('black', 'white'))+
facet_grid(plotesm~experiment ) +
theme(legend.position = 'none')
ggplot() +
geom_sf(data = shp_esms2 %>% na.omit, aes( color = precip_change )) +
scale_color_manual(values = c('black', 'white'))+
facet_grid(plotesm~experiment ) +
theme(legend.position = 'none')
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig <- plot_ly(region_summary, x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
colors = region_summary$orig_color)
fig
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp126'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
colors = (region_summary %>% filter(experiment == 'ssp126'))$orig_color)
fig
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp245'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
colors = (region_summary %>% filter(experiment == 'ssp245'))$orig_color)
fig
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp370'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
colors = (region_summary %>% filter(experiment == 'ssp370'))$orig_color)
fig
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
fig <- plot_ly(region_summary %>% filter(experiment == 'ssp585'), x = ~tas_change, y = ~iasd, z = ~pr_pct, color = ~orig_color,
colors = (region_summary %>% filter(experiment == 'ssp585'))$orig_color)
fig
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode